CUDA_VISIBLE_DEVICES=4,5,6,7 python stage3.py \
  dataset=somethingv1 \
  data_dir=PATH_TO_DATASET \
  train_stage=3 \
  batch_size=32 \
  num_segments_glancer=8 \
  num_segments_focuser=12 \
  glance_size=224 \
  patch_size=144 \
  random_patch=False \
  epochs=10 \
  backbone_lr=0. \
  fc_lr=0.0005 \
  lr_type=cos \
  workers=8 \
  dropout=0 \
  ppo_continuous=True \
  action_std=0.25 \
  actorcritic_with_bn=True \
  with_glancer=True \
  load_pretrained_s2_fc=True \
  dist_url=tcp://127.0.0.1:8815 \
  eval_freq=1 \
  start_eval=0 \
  print_freq=25 \
  amp=False \
  multiprocessing_distributed=False \
  pretrained_s2=PATH_TO_STAGE2_PRETRAINED_MODEL # load the stage2 pretrained model

